import json
import re
from tqdm import tqdm
def count_space(string):
    for i in range(len(string)):
        if string[i] != ' ':
            return i

    return 0



with open('./NEW_description_and_query.json', 'r') as fp:
    data = json.load(fp)


print('dada')
pattern = r'(?m)^\d+\.\s+[\s\S]*?(?=^\S|\Z)'

new_data = []
for key, value in tqdm(data.items()):
    value['key'] = key

    matches = re.findall(pattern, value['description'])
    matches = [sample.strip() for sample in matches]
    if len(matches) == 0:
        continue
    value['description'] = '\n'.join(matches)

    line_by_line_code = ""
    for _key, _value in value['line_by_line'].items():
        len_of_space = count_space(_key)
        str_space = ' ' * len_of_space
        line_by_line_code += f'{str_space}# {_value}\n'
        line_by_line_code += _key
        line_by_line_code += '\n'
    line_by_line_code = line_by_line_code.strip()
    value['line_by_line'] = line_by_line_code
    new_data.append(value)


with open('./query_and_description_2.json', 'w') as fp:
    json.dump(new_data, fp, indent=4)
print('dada')